{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 5.15 \u6253\u5370\u4e0d\u5408\u6cd5\u7684\u6587\u4ef6\u540d\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### \u95ee\u9898\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u4f60\u7684\u7a0b\u5e8f\u83b7\u53d6\u4e86\u4e00\u4e2a\u76ee\u5f55\u4e2d\u7684\u6587\u4ef6\u540d\u5217\u8868\uff0c\u4f46\u662f\u5f53\u5b83\u8bd5\u7740\u53bb\u6253\u5370\u6587\u4ef6\u540d\u7684\u65f6\u5019\u7a0b\u5e8f\u5d29\u6e83\uff0c\n\u51fa\u73b0\u4e86 UnicodeEncodeError \u5f02\u5e38\u548c\u4e00\u6761\u5947\u602a\u7684\u6d88\u606f\u2014\u2014 surrogates not allowed \u3002"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### \u89e3\u51b3\u65b9\u6848\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5f53\u6253\u5370\u672a\u77e5\u7684\u6587\u4ef6\u540d\u65f6\uff0c\u4f7f\u7528\u4e0b\u9762\u7684\u65b9\u6cd5\u53ef\u4ee5\u907f\u514d\u8fd9\u6837\u7684\u9519\u8bef\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def bad_filename(filename):\n    return repr(filename)[1:-1]\n\ntry:\n    print(filename)\nexcept UnicodeEncodeError:\n    print(bad_filename(filename))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### \u8ba8\u8bba\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u8fd9\u4e00\u5c0f\u8282\u8ba8\u8bba\u7684\u662f\u5728\u7f16\u5199\u5fc5\u987b\u5904\u7406\u6587\u4ef6\u7cfb\u7edf\u7684\u7a0b\u5e8f\u65f6\u4e00\u4e2a\u4e0d\u592a\u5e38\u89c1\u4f46\u53c8\u5f88\u68d8\u624b\u7684\u95ee\u9898\u3002\n\u9ed8\u8ba4\u60c5\u51b5\u4e0b\uff0cPython\u5047\u5b9a\u6240\u6709\u6587\u4ef6\u540d\u90fd\u5df2\u7ecf\u6839\u636e sys.getfilesystemencoding() \u7684\u503c\u7f16\u7801\u8fc7\u4e86\u3002\n\u4f46\u662f\uff0c\u6709\u4e00\u4e9b\u6587\u4ef6\u7cfb\u7edf\u5e76\u6ca1\u6709\u5f3a\u5236\u8981\u6c42\u8fd9\u6837\u505a\uff0c\u56e0\u6b64\u5141\u8bb8\u521b\u5efa\u6587\u4ef6\u540d\u6ca1\u6709\u6b63\u786e\u7f16\u7801\u7684\u6587\u4ef6\u3002\n\u8fd9\u79cd\u60c5\u51b5\u4e0d\u592a\u5e38\u89c1\uff0c\u4f46\u662f\u603b\u4f1a\u6709\u4e9b\u7528\u6237\u5192\u9669\u8fd9\u6837\u505a\u6216\u8005\u662f\u65e0\u610f\u4e4b\u4e2d\u8fd9\u6837\u505a\u4e86(\n\u53ef\u80fd\u662f\u5728\u4e00\u4e2a\u6709\u7f3a\u9677\u7684\u4ee3\u7801\u4e2d\u7ed9 open() \u51fd\u6570\u4f20\u9012\u4e86\u4e00\u4e2a\u4e0d\u5408\u89c4\u8303\u7684\u6587\u4ef6\u540d)\u3002"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5f53\u6267\u884c\u7c7b\u4f3c os.listdir() \u8fd9\u6837\u7684\u51fd\u6570\u65f6\uff0c\u8fd9\u4e9b\u4e0d\u5408\u89c4\u8303\u7684\u6587\u4ef6\u540d\u5c31\u4f1a\u8ba9Python\u9677\u5165\u56f0\u5883\u3002\n\u4e00\u65b9\u9762\uff0c\u5b83\u4e0d\u80fd\u4ec5\u4ec5\u53ea\u662f\u4e22\u5f03\u8fd9\u4e9b\u4e0d\u5408\u683c\u7684\u540d\u5b57\u3002\u800c\u53e6\u4e00\u65b9\u9762\uff0c\u5b83\u53c8\u4e0d\u80fd\u5c06\u8fd9\u4e9b\u6587\u4ef6\u540d\u8f6c\u6362\u4e3a\u6b63\u786e\u7684\u6587\u672c\u5b57\u7b26\u4e32\u3002\nPython\u5bf9\u8fd9\u4e2a\u95ee\u9898\u7684\u89e3\u51b3\u65b9\u6848\u662f\u4ece\u6587\u4ef6\u540d\u4e2d\u83b7\u53d6\u672a\u89e3\u7801\u7684\u5b57\u8282\u503c\u6bd4\u5982 \\xhh\n\u5e76\u5c06\u5b83\u6620\u5c04\u6210Unicode\u5b57\u7b26 \\udchh \u8868\u793a\u7684\u6240\u8c13\u7684\u201d\u4ee3\u7406\u7f16\u7801\u201d\u3002\n\u4e0b\u9762\u4e00\u4e2a\u4f8b\u5b50\u6f14\u793a\u4e86\u5f53\u4e00\u4e2a\u4e0d\u5408\u683c\u76ee\u5f55\u5217\u8868\u4e2d\u542b\u6709\u4e00\u4e2a\u6587\u4ef6\u540d\u4e3ab\u00e4d.txt(\u4f7f\u7528Latin-1\u800c\u4e0d\u662fUTF-8\u7f16\u7801)\u65f6\u7684\u6837\u5b50\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import os\nfiles = os.listdir('.')\nfiles"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5982\u679c\u4f60\u6709\u4ee3\u7801\u9700\u8981\u64cd\u4f5c\u6587\u4ef6\u540d\u6216\u8005\u5c06\u6587\u4ef6\u540d\u4f20\u9012\u7ed9 open() \u8fd9\u6837\u7684\u51fd\u6570\uff0c\u4e00\u5207\u90fd\u80fd\u6b63\u5e38\u5de5\u4f5c\u3002\n\u53ea\u6709\u5f53\u4f60\u60f3\u8981\u8f93\u51fa\u6587\u4ef6\u540d\u65f6\u624d\u4f1a\u78b0\u5230\u4e9b\u9ebb\u70e6(\u6bd4\u5982\u6253\u5370\u8f93\u51fa\u5230\u5c4f\u5e55\u6216\u65e5\u5fd7\u6587\u4ef6\u7b49)\u3002\n\u7279\u522b\u7684\uff0c\u5f53\u4f60\u60f3\u6253\u5370\u4e0a\u9762\u7684\u6587\u4ef6\u540d\u5217\u8868\u65f6\uff0c\u4f60\u7684\u7a0b\u5e8f\u5c31\u4f1a\u5d29\u6e83\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "for name in files:\n    print(name)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u7a0b\u5e8f\u5d29\u6e83\u7684\u539f\u56e0\u5c31\u662f\u5b57\u7b26 \\udce4 \u662f\u4e00\u4e2a\u975e\u6cd5\u7684Unicode\u5b57\u7b26\u3002\n\u5b83\u5176\u5b9e\u662f\u4e00\u4e2a\u88ab\u79f0\u4e3a\u4ee3\u7406\u5b57\u7b26\u5bf9\u7684\u53cc\u5b57\u7b26\u7ec4\u5408\u7684\u540e\u534a\u90e8\u5206\u3002\n\u7531\u4e8e\u7f3a\u5c11\u4e86\u524d\u534a\u90e8\u5206\uff0c\u56e0\u6b64\u5b83\u662f\u4e2a\u975e\u6cd5\u7684Unicode\u3002\n\u6240\u4ee5\uff0c\u552f\u4e00\u80fd\u6210\u529f\u8f93\u51fa\u7684\u65b9\u6cd5\u5c31\u662f\u5f53\u9047\u5230\u4e0d\u5408\u6cd5\u6587\u4ef6\u540d\u65f6\u91c7\u53d6\u76f8\u5e94\u7684\u8865\u6551\u63aa\u65bd\u3002\n\u6bd4\u5982\u53ef\u4ee5\u5c06\u4e0a\u8ff0\u4ee3\u7801\u4fee\u6539\u5982\u4e0b\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "for name in files:\ntry:\n    print(name)\nexcept UnicodeEncodeError:\n    print(bad_filename(name))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5728 bad_filename() \u51fd\u6570\u4e2d\u600e\u6837\u5904\u7f6e\u53d6\u51b3\u4e8e\u4f60\u81ea\u5df1\u3002\n\u53e6\u5916\u4e00\u4e2a\u9009\u62e9\u5c31\u662f\u901a\u8fc7\u67d0\u79cd\u65b9\u5f0f\u91cd\u65b0\u7f16\u7801\uff0c\u793a\u4f8b\u5982\u4e0b\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def bad_filename(filename):\n    temp = filename.encode(sys.getfilesystemencoding(), errors='surrogateescape')\n    return temp.decode('latin-1')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u8bd1\u8005\u6ce8:"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "surrogateescape:\n\u8fd9\u79cd\u662fPython\u5728\u7edd\u5927\u90e8\u5206\u9762\u5411OS\u7684API\u4e2d\u6240\u4f7f\u7528\u7684\u9519\u8bef\u5904\u7406\u5668\uff0c\n\u5b83\u80fd\u4ee5\u4e00\u79cd\u4f18\u96c5\u7684\u65b9\u5f0f\u5904\u7406\u7531\u64cd\u4f5c\u7cfb\u7edf\u63d0\u4f9b\u7684\u6570\u636e\u7684\u7f16\u7801\u95ee\u9898\u3002\n\u5728\u89e3\u7801\u51fa\u9519\u65f6\u4f1a\u5c06\u51fa\u9519\u5b57\u8282\u5b58\u50a8\u5230\u4e00\u4e2a\u5f88\u5c11\u88ab\u4f7f\u7528\u5230\u7684Unicode\u7f16\u7801\u8303\u56f4\u5185\u3002\n\u5728\u7f16\u7801\u65f6\u5c06\u90a3\u4e9b\u9690\u85cf\u503c\u53c8\u8fd8\u539f\u56de\u539f\u5148\u89e3\u7801\u5931\u8d25\u7684\u5b57\u8282\u5e8f\u5217\u3002\n\u5b83\u4e0d\u4ec5\u5bf9\u4e8eOS API\u975e\u5e38\u6709\u7528\uff0c\u4e5f\u80fd\u5f88\u5bb9\u6613\u7684\u5904\u7406\u5176\u4ed6\u60c5\u51b5\u4e0b\u7684\u7f16\u7801\u9519\u8bef\u3002"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u4f7f\u7528\u8fd9\u4e2a\u7248\u672c\u4ea7\u751f\u7684\u8f93\u51fa\u5982\u4e0b\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "for name in files:\n    try:\n        print(name)\n    except UnicodeEncodeError:\n        print(bad_filename(name))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u8fd9\u4e00\u5c0f\u8282\u4e3b\u9898\u53ef\u80fd\u4f1a\u88ab\u5927\u90e8\u5206\u8bfb\u8005\u6240\u5ffd\u7565\u3002\u4f46\u662f\u5982\u679c\u4f60\u5728\u7f16\u5199\u4f9d\u8d56\u6587\u4ef6\u540d\u548c\u6587\u4ef6\u7cfb\u7edf\u7684\u5173\u952e\u4efb\u52a1\u7a0b\u5e8f\u65f6\uff0c\n\u5c31\u5fc5\u987b\u5f97\u8003\u8651\u5230\u8fd9\u4e2a\u3002\u5426\u5219\u4f60\u53ef\u80fd\u4f1a\u5728\u67d0\u4e2a\u5468\u672b\u88ab\u53eb\u5230\u529e\u516c\u5ba4\u53bb\u8c03\u8bd5\u4e00\u4e9b\u4ee4\u4eba\u8d39\u89e3\u7684\u9519\u8bef\u3002"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.1"
    },
    "toc": {
      "base_numbering": 1,
      "nav_menu": {},
      "number_sections": true,
      "sideBar": true,
      "skip_h1_title": true,
      "title_cell": "Table of Contents",
      "title_sidebar": "Contents",
      "toc_cell": false,
      "toc_position": {},
      "toc_section_display": true,
      "toc_window_display": true
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}